/* Copyright (c) 2008, Nozomi SATO. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. Neither the name of Nozomi Sato nor the names of its contributors
 *     may be used to endorse or promote products derived from this
 *     software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#define PRECISION_SHIFT     13

#include "rgb_to_yuv_constants.h"

#define in_ptr			r0
#define outptr			r1
#define width			r2

#define const128		r3

#define R				r4
#define G				r5
#define B				r6
#define c1				r7
#define c2				r8
#define c3				r9
#define c4				r10
#define c5				r11

#define k1				c1
#define k2				c1
#define k3				c2
#define k4				c2
#define k5				c3
#define k6				c3
#define k7				c4
#define k8				c4
#define k9				c5

#define tmp				r12


/**
 * ARM Architecture Reference Manual A3.2.1
 * EQ  		Equal 								Z set
 * NE		Not Equal							Z clear
 * CS/HS	Carry set/unsigned higher or same	C set
 * CC/LO	Carry clear/unsigned lower			C clear
 * MI		Minus/negative						N set
 * PL		Plus/positive or zero				N clear
 * VS		Overflow							V set
 * VC		No overflow							V clear
 * HI		Unsigned higher						C set and Z clear
 * LS		Unsigned lower or same				C clear or Z set
 * GE		Signed greater than or equal		N set and V set, or
 *												N clear and V clear(N==V)
 * LE		Signed less than or equal			Z set, or N set and V clear, or
 *												N clear and V set(Z==1 or N!=V)
 * AL		always(uncoditional)				-
 */

/*
 S
  If <Rd> is not R15, the N and Z flags are set according to the result of the subtraction, and the C and V flags are set according to whether the subtraction generated a borrow (unsigned underflow) and a signed overflow, respectively. The rest of the CPSR is unchanged.
*/

	.macro	get_rgb, src_offset
#if (RGB_BPP == 4)
	ldr		R, [in_ptr, #\src_offset]!
	mov     tmp, #0xff
	and     B, tmp, R, lsl #B_SHIFT
	and     G, tmp, R, lsl #G_SHIFT
	and     R, tmp, R, lsl #R_SHIFT
#elif (RGB_BPP == 3)
	ldrb	B, [in_ptr, #B_OFFSET + \src_offset]
	ldrb	G, [in_ptr, #G_OFFSET + \src_offset]
	ldrb	R, [in_ptr, #R_OFFSET + \src_offset]
#else
#error "invalid RGB_BPP"
#endif
	.endm

	.macro	set_y, dst_offset
	@ Y  = 0.299R + 0.587G + 0.114B
	smulbb	tmp, R, k1				@ tmp =  R * K1
	smlabt	tmp, G, k2, tmp			@ tmp += G * K2
	smlabb	tmp, B, k3, tmp			@ tmp += B * K3
	asr		tmp, #PRECISION_SHIFT	@ tmp >>= PRECISION_SHIFT
	strb	tmp, [outptr, #\dst_offset]
	.endm

	.text
@ (const u8 *src, u8 *dst, u32 width)
	.align	4
	.global	FUNC_NAME
FUNC_NAME:
	stmfd	sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}

	@ load immediate
.L0:
	add		tmp, pc, #.LWORD - .L0 - 8
	ldmfd	tmp, {c1, c2, c3, c4, c5}
	mov		const128, #128

.L1:
	subs	width, #PIXEL_GRANURALITY
	ldmmifd	sp!, {r4, r5, r6, r7, r8, r9, sl, fp, pc} @ width - 2 < 0
	@ N Flag

.L2:
@ load first pixel
	get_rgb	0

@ U
	@ Cb =-0.169R - 0.331G + 0.500B + 128
	smulbt	tmp, R, k4		 		@ tmp =  R * -K4
	smlabb	tmp, G, k5, tmp			@ tmp += G * -K5
	smlabt	tmp, B, k6, tmp			@ tmp += B * K6
	add		tmp, const128, tmp, asr #PRECISION_SHIFT
									@ (tmp >> CRACK_SHIFT) + 128
	strb	tmp, [outptr, #0]

@ Y1
	set_y	1

@ V
	@ Cr = 0.500R - 0.419G - 0.081B + 128
	smulbb	tmp, R, k7				@ tmp =  R * K7
	smlabt	tmp, G, k8, tmp			@ tmp += G * -K8
	smlabb	tmp, B, k9, tmp			@ tmp += B * -K9
	add		tmp, const128, tmp, asr #PRECISION_SHIFT
									@ (tmp >> CRACK_SHIFT) + 128
	strb	tmp, [outptr, #2]

@ load second pixel
	get_rgb	RGB_BPP

@ Y2
	set_y	3

.L3:
	add		in_ptr, #RGB_BPP * PIXEL_GRANURALITY
	add		outptr, #YUV_BPP * PIXEL_GRANURALITY
	b		.L1
	.align	2
.LWORD:
	.short	C_0_29891
	.short	C_0_58661
	.short	C_0_11448
	.short	-C_0_16874
	.short	-C_0_33126
	.short	C_0_50000
	.short	C_0_50000
	.short	-C_0_41869
	.short	-C_0_08131
.size FUNC_NAME,.-FUNC_NAME
